User Data Analytics
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(flextable)
##
## Attaching package: 'flextable'
## The following objects are masked from 'package:kableExtra':
##
## as_image, footnote
## The following object is masked from 'package:purrr':
##
## compose
library(gmodels)
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
library(statar)
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following objects are masked from 'package:flextable':
##
## border, font, rotate
salad_coupon <- read.csv("/Users/bowenjin/Desktop/Lion's Choice/rfm_trans.csv")
salad_coupon
dim(salad_coupon)
## [1] 25763 6
#response rate of the coupon
CrossTable(salad_coupon$X13....2.Off.Butcher.Block.Sala.13...Redeemed)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 25763
##
##
## | 0 | 1 |
## |-----------|-----------|
## | 25636 | 127 |
## | 0.995 | 0.005 |
## |-----------|-----------|
##
##
##
##
Response rate is 0.5%
# Create the quintiles for R, F, M
salad_coupon1 <- salad_coupon %>%
summarise(CardNumber = Card.Number,
recency = Last.Guest.Activity.Date,
frequency = X10...Visits.10...Balance,
monetary = X1...Dollars.Spent.1...Balance,
coupon = X13....2.Off.Butcher.Block.Sala.13...Redeemed,
rec_quin = xtile(Last.Guest.Activity.Date, 5),
freq_quin = xtile(X10...Visits.10...Balance, 5),
mv_quin = xtile(X1...Dollars.Spent.1...Balance, 5))
salad_coupon1
#check and adjust ranking for R, F, M
#Recency rank
salad_coupon1 %>% group_by(rec_quin) %>% summarise(avg_rec = mean(recency), .groups="drop")
#Frequency rank adjust
salad_coupon1 %>% group_by(freq_quin) %>% summarise(avg_freq = mean(frequency), .groups="drop")
salad_coupon1$freq_quin <- max(salad_coupon1$freq_quin) + 1 - salad_coupon1$freq_quin
salad_coupon1 %>% group_by(freq_quin) %>% summarise(avg_freq = mean(frequency), .groups="drop")
#Monetary rank adjust
salad_coupon1 %>% group_by(mv_quin) %>% summarise(avg_mv = mean(monetary), .groups="drop")
salad_coupon1$mv_quin <- max(salad_coupon1$mv_quin) + 1 - salad_coupon1$mv_quin
salad_coupon1 %>% group_by(mv_quin) %>% summarise(avg_mv = mean(monetary), .groups="drop")
#create rfm index
salad_coupon1 <- salad_coupon1 %>%
mutate(rfmindex_iq = 100*rec_quin + 10*freq_quin + mv_quin)
salad_coupon1
#response rate in each RFM group
avg_resp_rate_rfm <- salad_coupon1 %>%
group_by(rfmindex_iq) %>%
summarise(resp_rate_rfm_iq=mean(coupon), .groups="drop") %>%
arrange(desc(resp_rate_rfm_iq))
avg_resp_rate_rfm
bar_avg_resp_rate_rfm <-
ggplot(data=avg_resp_rate_rfm,
aes(x = as.factor(rfmindex_iq), y = resp_rate_rfm_iq)) +
labs(x="RFM Cells",
y="Average Response Rate",
title = "Response Rates by Independent RFM Cells") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_bar(stat="identity") +
scale_x_discrete(breaks = seq(111, 555, by = 5))
bar_avg_resp_rate_rfm
#response rate for every member
salad_coupon1 <- salad_coupon1 %>%
group_by(rfmindex_iq) %>%
mutate(resp_rate_by_rfm_iq = mean(coupon)) %>% ungroup()
salad_coupon1